####################################### DATA #######################################

sessionInfo()                                 # list of packages currently attached
library()                                     # see packages currently installed
library(help="datasets")                      # Information on package 'datasets'
data(package="datasets")                      # get info on data in the 'dataset' package 
AirPassengers                                 # data set AirPassenger from the 'dataset'
                                              # package is immediately available
                                              #              ATTENTION:
                                              # discrepancies between R version and package
                                              # version might require to load the data first
                                              #       use "data(NAME.DATA)" for version >= 2.0.0    
data(package=.packages(all.available = TRUE)) # get info on data from all the data sets 
                                              # in packages already installed
data()                                        # list of data sets (mostly data frames) in all 
                                              # packages that are attached to the session
demo(graphics)                                # see some of the possibilities that R offers
q()                                           # quit current session and save workspace

############### Importing/Exporting Data in R #######################################

my.hills<-read.table("hills.txt",header=TRUE) # read a text file into data frame
head(my.hills, n=3)                                      # same as 'hills' from MASS

Year<-c(1800,1850,1900,1950,2000)             
Carbon<-c(8,54,534,1630,6611)                 
write.table(                                  # write a data frame into a csv file
  data.frame(year=Year,carbon=Carbon),        # most spreadsheet can export/import csv files
  file="fuel.csv",row.names=F,sep=",")
fossifuel<-read.csv("fuel.csv")               # reverse action
fossifuel  						    # return a data frame storing the data
fossifuel<-read.table("fuel.csv",             # the same
                      header=TRUE,sep=",")    # read a text file with columns separated 
fossifuel					          # by commas into a data frame

write(Carbon,file="Carbon.txt")               # write a vector into a text file
scan("Carbon.txt")                            # read a text file into a vector

matrix(1:10,ncol=2)
write(matrix(1:10,ncol=2),file="x.mat.txt")   # write a matrix into a text file, 
                                              # ATTENTION:columns become rows
scan("x.mat.txt")                             # ATTENTION: it creates a vector
matrix(scan("x.mat.txt"),ncol=2)              # to get back a matrix use 'matrix()' 

x<-c(1)
data.entry(x)					    # spreadsheet interface for editing data
a.df<-data.frame()                            # dummy data frame
fix(a.df)                                     # make permanent changes to a.df
a.df
edit(a.df)                                    # 'edit' does not change the object 'a.df'
                                              # instead, a copy is made and that copy 
                                              # is changed 

############### Demo: Simple Data Analysis ############################################# 
library(MASS)                                 # attach the package MASS 
                                              # all data set of MASS are now accessible
hills                                         # display the contents of the data frame "hills"
average<-sapply(hills,mean)                          # arithmetic means of the three columns of data
average                                       # print the content of the object 'average'
range(hills$time)                             # range of the values in the vector 'time'
plot(time ~ dist, data=hills, pch=16)         # plot the data
out<-lm(time ~ dist, data=hills)              # fit the simple linear model: 
                                              # time=b_0+b_1*dist+error 
summary(out)                                  # show results of regression
plot(out)                                     # display of regression fitting and diagnostics
b<-out$coeff                                  # estimates of coefficients
plot(time ~ dist, data=hills, pch=16) 
abline(b[1],b[2],col=2)                       # actual data vs fitted regression line
                                              # first argument = intercept,
                                              # second argument = slope
points(average[1],average[3],
       pch=17,col=2,cex=1.5)                  # plot average values of 'dist' and 'time'
abline(v=average[1],lty=2)                    ## v = vertical line at mean(dist), lty tratteggita
abline(h=average[3],lty=2)                    ## h = horizontal line at mean(time)

############### Vectors #######################################

coeffs<-c(1,-3,2)                             # vector containing the coefficients of the 
							    # equation x^2-3x+2=0	
class(coeffs)                                 # class of the object "coeffs"
length(coeffs)                                # dimension of the vector
names(coeffs)                                 # names of the elements of the vector
names(coeffs)<-c("a","b","c")                 # modified names
class(c("a","b","c"))                         # class is character
coeffs                                        # display all elements of 'coeffs'
coeffs[2]                                     # display 2nd element of 'coeffs'
coeffs["b"]                                   # the same
discr<-coeffs[2]^2-4*coeffs[1]*coeffs[3]      # discriminant of the quadratic equation b^2-4ac
discr>0                                       #     b^2-4ac>0 gives a logical value
x<-seq(0,3,length=200)                        ## regular spaced points as values of x
y<-coeffs[1]*x^2+coeffs[2]*x+coeffs[3]        # images of x under the quadratic function
                                              #     y=f(x)=a*x^2+b*x+c
plot(x,y,type="l",ylab=expression(x^2-3*x+2)) # plot as a line 
abline(h=0,lty=2)                             # intersections with the first axis
                                              # are solutions of the equation

seq(0,3,length=200)                           # regular spaced points = sequenza da 0 a 3 di 200 punti
-2:2                                          # sequence of integers
x<-vector("numeric",length=100)               # create an empty numeric vector of 100
c(x,x)                                        # combine two vectors    

ax<- 1; bx<- -3; cx<- 2
ax-bx+2*cx                                    # [1] 8
bx/bx*cx/ax+bx                                # [1] -1
(bx/bx)*(cx/ax)+bx                            # better
bx^cx                                         # -3 raised to 2

log(cx); log(cx,exp(1)); log(x=cx,base=exp(1))# all the same, log(2)
x<-cx/(-bx)                                   # [1] 0.6666...
round(x,2); trunc(x); ceiling(x); floor(x)    ##

x<-seq(1,3,length=21)                         # [1] 1 1.1 1.2 ... 2.8 2.9 3
min(x); max(x); mean(x)
sum(x); prod(x)
cumsum(x) 
cumprod(x)
sum(x)/length(x)                              # arithmetic mean, equal to mean(x)
prod(1:5)                                     ## factorial
factorial(1:5)                                ## factorial
sum((x-mean(x))^2)/(length(x)-1)              # sample variance
var(x)						    # sample variance

############### Logic #######################################

x<- 1:5					          # [1] 1 2 3 4 5
x < 5                                         # is x less than 5?
x > 1                                         # is x bigger than 5?
x > 1 & x < 5                                 # is x bigger than 1 and less than 5?
x > 1 && x < 5                                # first one is false
x > 1 | x < 5                                 # is x bigger than 1 or less than 5?
x > 1 || x < 5                                # first one is true
x == 3                                        # is x equal to 3?
x != 3                                        # is x not equal to 3?
!x == 3                                       # not (x equal to 3)
!(x == 3)                                     # better
A<- x > 1 
B<- x < 5
!( (!A) | (!B) )                              # De Morgan 
A & B
A + 1                                         # logical vector in ordinary arithmetic
sum(A)                                        # count numbers of TRUE in A
any(A)                                        # is there any TRUE in A
sum(A) > 1                                    # the same
all(A)                                        # are all elements of A TRUE 
sum(A) == length(A)                           # the same
prod(A) == 1                                  # the same
which(B)                                      # which elements of B are TRUE

############### Character vectors and factors #######################################

A<- c("beer","beer","wine","water")           
B<- c("beer","wine")                          # two vectors of class character
A
table(A)						    			  # finds the unique values and tabulates the frequencies of their occurrences
union(A,B)                                    # no duplicates value
intersect(A,B)
setdiff(A,B)

drinks<-factor(A)                             # coerce a character vector to a factor
drinks						  				  # it is used to store categorical data
							           	    # with a specific set of values called levels
table(drinks)				    		      # operates the same
unclass(drinks)                               # stored as integer vector
levels(drinks)				  		        # returns the unique values (useful)
names(table(A))							    # the same
alcoholic<-factor(B)
union(drinks,alcoholic)                       # operates the same
intersect(drinks,alcoholic) 

beer<-c(3,4,1,1,3,4,3,3,1,2,1,2)              # daily consumption of 12 peaple  
wine<-c(2,0,1,4,2,0,1,2,1,2,0,2)              # of beer and wine glasses 
table(beer,wine)					   			 # table() by default converts numeric vectors to factors
PIPPO<-round( table(beer,wine)/length(beer),2 )      # divide by 12, round to the second digit
sum(PIPPO)

############### Data frames #######################################

#write.table(Cars93.summary,file="Cars93.summary.txt",row.names=T,sep=" ")
Cars93.summary<-read.table("Cars93.summary.txt",header=TRUE) 
	
Cars93.summary                                # all data set of DAAG are in the work space
head(Cars93.summary, n=3)                     # display the first three rows
Cars93.summary[1:3, ]                         # the same
rownames(Cars93.summary)                      # row names
names(Cars93.summary)                         # variable (column) names
summary(Cars93.summary)                       # simple summary statistics: min, max, quartiles
							    # and mean for numeric vectors; levels and
					                # frequencies for factors.

data.frame( X1=1:10, X2=I(letters[1:10]),     # create a data frame
            X3=factor(letters[1:10]) )  
data.frame( 1:10, I(letters[1:10]),           # R attemps to pick row and column names from
            factor(letters[1:10]) )           # the constituent vectors
my.data<-data.frame( 1:10, I(letters[1:10]),
                     factor(letters[1:10]) )             
names(my.data)<-c("X1","X2","X3")             # assign the names to variables
my.data                                       

Min.passengers                                # error, object not found
Cars93.summary$Min.passenger                  # it is now accessed as a vector 
attach(Cars93.summary)                        # attach data frame
Min.passengers					    # now the object is accessed by its name
class(abbrev)                                 # 'abbrev' is a vector of class factor
detach(Cars93.summary)                        # detach data frame

library(MASS)                                 # attach MASS package
data(package="MASS")				    # display dataset in package = cosa c'e' dentro                           
head(Cars93,n=3)                              # display the first three rows of 'Cars93'
Cars93[1:3,c(3,18)]                           # "Type" and "Passengers" are the 3rd and 18th
							    # variables of the data frame Cars93
class(Cars93$Type)			          # factor

attach(Cars93)                                # attach data frame
Cars93$Type
Type
aggregate(Passengers,by=list(Type),min)       # reproduce first variable of 'Cars93.summary'
aggregate(by=list(Type),Passengers,min)       # reproduce first variable of 'Cars93.summary'
aggregate(by=list(Type),Passengers,sum)       # reproduce first variable of 'Cars93.summary'
aggregate(Passengers,by=list(Type),max)       # reproduce first variable of 'Cars93.summary'
Cars93.summary                                # check
detach(Cars93)                                # detach data frame

my.Cars<-stack(Cars93.summary, select= 1:2)   # ACCODAMENTO - concatenates selected columns into a single 
my.Cars                                       # column named "values", and add a factor 
                                              # named "ind" that has names of the concatenated
                                              # columns as levels
unstack(my.Cars)                              # reverse the stacking operation

library(MASS)  
head(hills,n=3)                               # back to "hills" data from MASS package
hills$time<-round(hills$time*60)              # express time in minutes (no decimal)
hills$time                                    # display new version
str(hills)
lapply(hills,max)                             # maximum of each columns as a list
sapply(hills,max)                             # the same as vector (see 'help(lapply)')

science<-read.table("science.txt",header=TRUE)

science[!complete.cases(science),]            # Which rows have missing values in the data 
                                              # frame science (DAAG)?
dim(science)                                  # 1385 observations (rows)
Science<- na.omit(science)                    # Omit any rows that contain missing values
dim(Science)                                  # 1383 observations

############### Lists #######################################
x.lis<-list(a=1:10, b=letters[1:3],           # create a list
            d=factor(rep(letters[1:4],2)))    # 'rep(x,n)' repeat the values in 'x' n times 
x.lis
x.lis$a                                       # object 1 is numeric
x.lis[[2]]                                    # object 2 is of class 'character'
table(x.lis$d)                                # form a table for object 3, a factor

e<-list(e=10:1)
c(x.lis,e)                                    # combine two lists
                                              
############### Matrices #######################################
x.mat<-matrix(1:12,nrow=3,ncol=4)             # create a matrix
x.mat                                         # display 'x.mat'
dim(x.mat)                                    # 'x.mat' is a 3x4-matrix
dimnames(x.mat)<-list(paste("row",1:3,sep=""),# assign names to rows and columns
             paste("col",1:4,sep=""))         # 'paste(x,y)' concatenates elementwise 
                                              # 'x' and 'y' after converting to character
x.mat
matrix( paste(                                # a 4x2-matrix of character arguments
         paste("entry",rep(1:4, 2),sep=" "),  # look at the combined use of 'paste' and 'rep'
        rep(1:2, each=4),sep=","),            # with 'rep(x,each=n)' each element of 'x'  
        nrow=4,ncol=2)                        # is repeated n times.

xx<-cbind(x.mat,x.mat)                        # combine x with itself by columns
xx                                            # 
xxx<-rbind(x.mat,x.mat)                       # combine x with itself by columns
xxx                                           # 
rbind(xx,xxx)                                 # Error: the dimensions are not compatible

Year<-c(1800,1850,1900,1950,2000)             # create vector of time points (in years)
Carbon<-c(8,54,534,1630,6611)                 # production level for each year
plot(Year,Carbon,pch=16,cex=1.5)              # plot Carbon against Year 
                                              # ('pch' is the point character, 'cex' is the 
                                              # size of the point)

fossifuel<-data.frame(year=Year,carbon=Carbon)# create a dataframe
fossifuel                                     # 
fossifuel.mat<-matrix(c(Year,Carbon),nrow=5)  # create a matrix, 
colnames(fossifuel.mat)<-c("year","carbon")   # assign names to columns
fossifuel.mat                                 # compare to data frame (no rows names) 
cbind(Year,Carbon)                            # the same as 'fossifuel.mat', but
                                              # column names are automatically assigned                             
############### Indexing #######################################
x<- 1:10                                      # [1] 1 2 ... 10
names(x)<- letters[x]                         # assign names as "a", "b", ... , "j" 
x[1:3]                                        # elements 1,2,3
x[c(1,10)]                                    # elements 1 and 10
x[c(-1,-2)]                                   # remove 1 and 2
x[x>5]                                        # elements > 5
x[c("a","d")]                                 # elements 1 and 4

x.mat<- matrix(1:100, ncol=10)                # create a (10,10)-matrix
x.mat[1:5,]                                   # first 5 rows, all columns
x.mat[1:4,x[x<3]]                             # check this


############### Computation #######################################

x.mat<- matrix(1:10, ncol=2)                  # create a (5,2)-matrix
x.mat
x.mat + 1                                     # sum by a scalar (recycling rule)
x.mat + x.mat                                 # sum of two matrices
x.mat %*% t(x.mat)                            # matrix product, t() is the transpose function
x.mat %*% x.mat                               # Error: non comformable for multiplication

X<- matrix(sample(1:10,24,replace=T),ncol=6)  # sample 24 times from 1,...,10 with replacement
                                              # then fill the values in a (4,6)-matrix 
dim(X)
y<-c(-1,0.5,0.3,0.2)                          # 4-dimensional vector
t(X) %*% y                                    # matrix product X'y, linear combination of the
                                              # rows of X
crossprod(X,y)                                # the same, more efficient
crossprod(X)                                  # matrix product X'X
crossprod(t(X))                               # matrix product XX', same as 'tcrossprod(X)'

A<-crossprod(t(X))                            # symmetric (4,4)-matrix
v<-eigen(A);v                                 # list with eigenvalues and eigenvectors of A
                                              # 'v$values' and 'v$vectors'
det(A); prod(v$values)                        # det(A)=prod_i(eigenvalues(A)_i)
crossprod(v$vectors[,2])                      # eigenvectors are normalized
crossprod(v$vectors[,1],v$vectors[,2])        # the matrix formed by concatenating 
                                              # eigenvectors is orthogonal 
                                              # (almost: R usenumerical routined for  
                                              # approximating the eigenvectors)
solve(A)                                      # inverse of A
A%*%solve(A)                                  # AA^(-1)= identity matrix
round(A%*%solve(A),2)                         # it is an approximation by numerical routines

P<-v$vectors;P                                # matrix with normalized eigenvectors as columns
apply(P,1,sum)                                # sum row-wise
apply(P,2,sum)                                # sum column-wise
apply(P,2,crossprod)                          # simultaneous check eigenvectors are normalized

############### More on Linear Algebra: decomposition of A #######################################
 
Lambda<-diag(v$values)                        # diagonal matrix with eigenvalues
inv.Lambda<-diag(v$values^(-1))               # inverse of a diagonal matrix
inv.A<-P%*%inv.Lambda%*%t(P)                  # inverse(A)= P*Lambda^{-1}P'
inv.A;solve(A)                                # check

                                              # ATTENTION: A must be positive definite!
square.root.A<-P%*%sqrt(Lambda)%*%t(P)        # A^{1/2}=P*Lambda^{1/2}*P' 
                                              # 'sqrt' operates element-wise 
square.root.A%*%square.root.A;A               # check

B<-P%*%sqrt(Lambda)                           # decomposition A=BB'
crossprod(t(B)); A                            # check
C<-chol(A)                                    # choleski decomposition A=C'C 
C                                             # with C upper triangular
crossprod(C); A                               # check

############### Graphics in R #######################################
primates<-read.table("primates.txt",header=TRUE)
attach(primates)
plot(Brainwt~Bodywt, data=primates)     		
                                              # Place labels on points and axes
plot(Brainwt~Bodywt, xlim=c(0, 300),          # 'xlim' sets the range of x-axis
     xlab="Body weight (kg)",                 # 'xlab' and 'ylab' set the axes labels
     ylab="Brain weight (g)", pch=16)         # 'pch' controls the plotting symbol
text(Brainwt ~ Bodywt,                        # pick labels from row names
     labels=row.names(primates), pos=4)       # 'pos=4', where  1: below, 2: to the left, 
                                              # 3: above, 4: to the right
mtext("Primates data", 3, line=1, cex=1.5)    # add text to the margin, leaving 1-line space
                                              # 'side=3', where 1: $x$-axis, 2: $y$-axis, 
                                              # 3: top, 4: right-vertical axis                            
detach(primates)
############### The use of color ####################################### 

theta <- (1:50)*0.92                          # 1st coordinate is in angle (radiants) 
                                              # DIFFERENT IN THE SCRIPT
plot(theta, sin(theta), col=1:50,             # plot *sin(theta)' (elementwise) against 
     pch=16, cex=4)                           # 'theta' with points of different colors
                                              # 'pch=16' set the symbol as a circle
points(theta, cos(theta), col=51:100,         # add points (theta,cos(theta)) of more  
	   pch=15, cex=4)				    # different colors
                                              # 'pch=15' set the symbol as a square

theta <- (1:400)*(pi/40)                      # 1st coordinate is in angle (radiants) 
plot(theta, sin(theta), type="l", col=2,      # plot 'sin(theta)' as a red line 
     lty=1, lwd=2)                            # 'lty=1' set the line type (solid)
                                              # 'lwd=2' set the line width (default is 1)
lines(theta, cos(theta), col=3,               #  add a green line for cos(theta)
     lty=2, lwd=2)                            # 'lty=2' set the line type as dashed


############### Portability #######################################
Year<-c(1800,1850,1900,1950,2000)             
Carbon<-c(8,54,534,1630,6611)                 
plot(Year,Carbon,type="l")
mtext("Fossifuel data", 3, line=1, cex=1.5)   
											  # now Copy on the Clipboard from the File menu,
                                              # or save the file in a specific format, 
                                              # again from the File menu 

jpeg("file.jpg")                              # Jpeg file saved in the directory
plot(Year,Carbon,type="l")
mtext("Fossifuel data", 3, line=1, cex=1.5)
dev.off()

pdf("file.pdf")                               # pdf file saved in the directory
plot(Year,Carbon,type="l")
mtext("Fossifuel data", 3, line=1, cex=1.5)
dev.off()

postscript("file.ps")                         # Postscript file
plot(Year,Carbon,type="l")                    # as default a ps file is in landscape mode
mtext("Fossifuel data", 3, line=1, cex=1.5)
dev.off()

postscript("file.ps", horizontal=F)           # plot in portrait mode
plot(Year,Carbon,type="l")
mtext("Fossifuel data", 3, line=1, cex=1.5)
dev.off()

postscript("file.ps", horizontal=F,
           height=8,width=8)                  # plot in portrait mode and resized
plot(Year,Carbon,type="l")
mtext("Fossifuel data", 3, line=1, cex=1.5)
dev.off()

postscript("file.ps", horizontal=F,
           paper="special",height=8,width=8)  # paper size specified by height and width
plot(Year,Carbon,type="l")
mtext("Fossifuel data", 3, line=1, cex=1.5)
dev.off()